[X86][AVX10.2] Fix wrong mask bits in cvtpbf8_ph intrinsics #120927

phoebewang · 2024-12-23T01:07:40Z

Found during review #120766

llvmbot · 2024-12-23T01:08:11Z

@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-clang

Author: Phoebe Wang (phoebewang)

Changes

Found during review #120766

Full diff: https://github.com/llvm/llvm-project/pull/120927.diff

4 Files Affected:

(modified) clang/lib/Headers/avx10_2_512convertintrin.h (+2-2)
(modified) clang/lib/Headers/avx10_2convertintrin.h (+2-2)
(modified) clang/test/CodeGen/X86/avx10_2_512convert-builtins.c (+2-2)
(modified) clang/test/CodeGen/X86/avx10_2convert-builtins.c (+2-2)

diff --git a/clang/lib/Headers/avx10_2_512convertintrin.h b/clang/lib/Headers/avx10_2_512convertintrin.h
index a34e135fa30473..60a5b1ef4548d8 100644
--- a/clang/lib/Headers/avx10_2_512convertintrin.h
+++ b/clang/lib/Headers/avx10_2_512convertintrin.h
@@ -308,13 +308,13 @@ static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtpbf8_ph(__m256i __A) {
 }
 
 static __inline __m512h __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtpbf8_ph(__m512h __S, __mmask16 __U, __m256i __A) {
+_mm512_mask_cvtpbf8_ph(__m512h __S, __mmask32 __U, __m256i __A) {
   return _mm512_castsi512_ph(
       _mm512_mask_slli_epi16((__m512i)__S, __U, _mm512_cvtepi8_epi16(__A), 8));
 }
 
 static __inline __m512h __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtpbf8_ph(__mmask16 __U, __m256i __A) {
+_mm512_maskz_cvtpbf8_ph(__mmask32 __U, __m256i __A) {
   return _mm512_castsi512_ph(
       _mm512_slli_epi16(_mm512_maskz_cvtepi8_epi16(__U, __A), 8));
 }
diff --git a/clang/lib/Headers/avx10_2convertintrin.h b/clang/lib/Headers/avx10_2convertintrin.h
index 134adb2850c8de..efe8477cbbf9be 100644
--- a/clang/lib/Headers/avx10_2convertintrin.h
+++ b/clang/lib/Headers/avx10_2convertintrin.h
@@ -580,13 +580,13 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtpbf8_ph(__m128i __A) {
 }
 
 static __inline__ __m256h __DEFAULT_FN_ATTRS256
-_mm256_mask_cvtpbf8_ph(__m256h __S, __mmask8 __U, __m128i __A) {
+_mm256_mask_cvtpbf8_ph(__m256h __S, __mmask16 __U, __m128i __A) {
   return _mm256_castsi256_ph(
       _mm256_mask_slli_epi16((__m256i)__S, __U, _mm256_cvtepi8_epi16(__A), 8));
 }
 
 static __inline__ __m256h __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvtpbf8_ph(__mmask8 __U, __m128i __A) {
+_mm256_maskz_cvtpbf8_ph(__mmask16 __U, __m128i __A) {
   return _mm256_castsi256_ph(
       _mm256_slli_epi16(_mm256_maskz_cvtepi8_epi16(__U, __A), 8));
 }
diff --git a/clang/test/CodeGen/X86/avx10_2_512convert-builtins.c b/clang/test/CodeGen/X86/avx10_2_512convert-builtins.c
index e71cc0c9ad6b02..6662e0cbf8a913 100644
--- a/clang/test/CodeGen/X86/avx10_2_512convert-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2_512convert-builtins.c
@@ -299,7 +299,7 @@ __m512h test_mm512_cvtpbf8_ph(__m256i A) {
   return _mm512_cvtpbf8_ph(A);
 }
 
-__m512h test_mm512_mask_cvtpbf8_ph(__m512h S, __mmask16 M, __m256i A) {
+__m512h test_mm512_mask_cvtpbf8_ph(__m512h S, __mmask32 M, __m256i A) {
   // CHECK-LABEL: @test_mm512_mask_cvtpbf8_ph
   // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
   // CHECK: @llvm.x86.avx512.pslli.w.512
@@ -308,7 +308,7 @@ __m512h test_mm512_mask_cvtpbf8_ph(__m512h S, __mmask16 M, __m256i A) {
   return _mm512_mask_cvtpbf8_ph(S, M, A);
 }
 
-__m512h test_mm512_maskz_cvtpbf8_ph(__mmask16 M, __m256i A) {
+__m512h test_mm512_maskz_cvtpbf8_ph(__mmask32 M, __m256i A) {
   // CHECK-LABEL: @test_mm512_maskz_cvtpbf8_ph
   // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
   // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
diff --git a/clang/test/CodeGen/X86/avx10_2convert-builtins.c b/clang/test/CodeGen/X86/avx10_2convert-builtins.c
index 8086c1b5d33993..7121b28719c077 100644
--- a/clang/test/CodeGen/X86/avx10_2convert-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2convert-builtins.c
@@ -593,7 +593,7 @@ __m128h test_mm_cvtpbf8_ph(__m128i A) {
   return _mm_cvtpbf8_ph(A);
 }
 
-__m128h test_mm_mask_cvtpbf8_ph(__m128h S, __mmask8 M, __m128i A) {
+__m128h test_mm_mask_cvtpbf8_ph(__m128h S, __mmask16 M, __m128i A) {
   // CHECK-LABEL: @test_mm_mask_cvtpbf8_ph
   // CHECK: sext <8 x i8> %{{.*}} to <8 x i16>
   // CHECK: @llvm.x86.sse2.pslli.w
@@ -602,7 +602,7 @@ __m128h test_mm_mask_cvtpbf8_ph(__m128h S, __mmask8 M, __m128i A) {
   return _mm_mask_cvtpbf8_ph(S, M, A);
 }
 
-__m128h test_mm_maskz_cvtpbf8_ph(__mmask8 M, __m128i A) {
+__m128h test_mm_maskz_cvtpbf8_ph(__mmask16 M, __m128i A) {
   // CHECK-LABEL: @test_mm_maskz_cvtpbf8_ph
   // CHECK: sext <8 x i8> %{{.*}} to <8 x i16>
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}

Found during review llvm#120766

llvm-ci · 2024-12-23T09:25:29Z

LLVM Buildbot has detected a new failure on builder lldb-aarch64-ubuntu running on linaro-lldb-aarch64-ubuntu while building clang at step 6 "test".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/10268

Here is the relevant piece of the build log for the reference

Step 6 (test) failure: build (failure)
...
PASS: lldb-unit :: Utility/./UtilityTests/1/8 (2060 of 2069)
PASS: lldb-unit :: Utility/./UtilityTests/4/8 (2061 of 2069)
PASS: lldb-unit :: ValueObject/./LLDBValueObjectTests/1/3 (2062 of 2069)
PASS: lldb-unit :: ValueObject/./LLDBValueObjectTests/2/3 (2063 of 2069)
PASS: lldb-unit :: tools/lldb-server/tests/./LLDBServerTests/0/2 (2064 of 2069)
PASS: lldb-unit :: tools/lldb-server/tests/./LLDBServerTests/1/2 (2065 of 2069)
PASS: lldb-unit :: Target/./TargetTests/11/14 (2066 of 2069)
PASS: lldb-unit :: Host/./HostTests/2/13 (2067 of 2069)
PASS: lldb-unit :: Process/gdb-remote/./ProcessGdbRemoteTests/8/9 (2068 of 2069)
UNRESOLVED: lldb-api :: tools/lldb-server/TestLldbGdbServer.py (2069 of 2069)
******************** TEST 'lldb-api :: tools/lldb-server/TestLldbGdbServer.py' FAILED ********************
Script:
--
/usr/bin/python3.10 /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./lib --env LLVM_INCLUDE_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/include --env LLVM_TOOLS_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin --arch aarch64 --build-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex --lldb-module-cache-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/lldb --compiler /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/clang --dsymutil /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/dsymutil --make /usr/bin/gmake --llvm-tools-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin --lldb-obj-root /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/tools/lldb --lldb-libs-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./lib /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/test/API/tools/lldb-server -p TestLldbGdbServer.py
--
Exit Code: 1

Command Output (stdout):
--
lldb version 20.0.0git (https://github.com/llvm/llvm-project.git revision 113177f98b9d7ac6edfa833d55ad6ad6fd4a0cbf)
  clang revision 113177f98b9d7ac6edfa833d55ad6ad6fd4a0cbf
  llvm revision 113177f98b9d7ac6edfa833d55ad6ad6fd4a0cbf
Skipping the following test categories: ['libc++', 'dsym', 'gmodules', 'debugserver', 'objc']

--
Command Output (stderr):
--
UNSUPPORTED: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hc_then_Csignal_signals_correct_thread_launch_debugserver (TestLldbGdbServer.LldbGdbServerTestCase) (test case does not fall in any category of interest for this run) 
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hc_then_Csignal_signals_correct_thread_launch_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hg_fails_on_another_pid_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hg_fails_on_minus_one_pid_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hg_fails_on_zero_pid_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
UNSUPPORTED: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hg_switches_to_3_threads_launch_debugserver (TestLldbGdbServer.LldbGdbServerTestCase) (test case does not fall in any category of interest for this run) 
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hg_switches_to_3_threads_launch_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
UNSUPPORTED: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_P_and_p_thread_suffix_work_debugserver (TestLldbGdbServer.LldbGdbServerTestCase) (test case does not fall in any category of interest for this run) 
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_P_and_p_thread_suffix_work_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
UNSUPPORTED: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_P_writes_all_gpr_registers_debugserver (TestLldbGdbServer.LldbGdbServerTestCase) (test case does not fall in any category of interest for this run) 
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_P_writes_all_gpr_registers_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
UNSUPPORTED: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_attach_commandline_continue_app_exits_debugserver (TestLldbGdbServer.LldbGdbServerTestCase) (test case does not fall in any category of interest for this run) 
Program aborted due to an unhandled Error:
Operation not permitted
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/lldb-server gdbserver --attach=1335856 --reverse-connect [127.0.0.1]:55407
 #0 0x0000aaaaddccfc40 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/lldb-server+0xb1fc40)
 #1 0x0000aaaaddccdc70 llvm::sys::RunSignalHandlers() (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/lldb-server+0xb1dc70)
 #2 0x0000aaaaddcd0350 SignalHandler(int) Signals.cpp:0:0
 #3 0x0000ffff8c6087dc (linux-vdso.so.1+0x7dc)
 #4 0x0000ffff8be0f200 __pthread_kill_implementation ./nptl/pthread_kill.c:44:76

Found during work on #120927. This caused the compiler to silently drop ignore half of the mask in the specific intrinsics.

) Found during work on llvm#120927. This caused the compiler to silently drop ignore half of the mask in the specific intrinsics. (cherry picked from commit af522c5)

) Found during work on llvm#120927. This caused the compiler to silently drop ignore half of the mask in the specific intrinsics.

phoebewang requested review from FreddyLeaf and RKSimon December 23, 2024 01:07

llvmbot added clang Clang issues not falling into any other category backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics labels Dec 23, 2024

phoebewang force-pushed the bf16 branch from 13176bd to 608ce20 Compare December 23, 2024 01:09

[X86][AVX10.2] Fix wrong mask bits in cvtpbf8_ph intrinsics

2b9cde2

Found during review llvm#120766

phoebewang force-pushed the bf16 branch from 608ce20 to 2b9cde2 Compare December 23, 2024 01:10

RKSimon approved these changes Dec 23, 2024

View reviewed changes

phoebewang merged commit 113177f into llvm:main Dec 23, 2024
8 checks passed

phoebewang deleted the bf16 branch December 23, 2024 09:14

mikolaj-pirog mentioned this pull request Feb 11, 2025

[AVX10.2] Fix wrong mask casting in some convert intrinsics #126627

Merged

phoebewang pushed a commit that referenced this pull request Feb 11, 2025

[AVX10.2] Fix wrong mask casting in some convert intrinsics (#126627)

af522c5

Found during work on #120927. This caused the compiler to silently drop ignore half of the mask in the specific intrinsics.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[X86][AVX10.2] Fix wrong mask bits in cvtpbf8_ph intrinsics #120927

[X86][AVX10.2] Fix wrong mask bits in cvtpbf8_ph intrinsics #120927

Uh oh!

phoebewang commented Dec 23, 2024

Uh oh!

llvmbot commented Dec 23, 2024 •

edited

Loading

Uh oh!

Uh oh!

llvm-ci commented Dec 23, 2024

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

[X86][AVX10.2] Fix wrong mask bits in cvtpbf8_ph intrinsics #120927

[X86][AVX10.2] Fix wrong mask bits in cvtpbf8_ph intrinsics #120927

Uh oh!

Conversation

phoebewang commented Dec 23, 2024

Uh oh!

llvmbot commented Dec 23, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

llvm-ci commented Dec 23, 2024

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

llvmbot commented Dec 23, 2024 •

edited

Loading